The graphs below don’t have proper titles, axis labels, legends, etc. Please take care to do this on your own graphs.
We can create static maps (road maps, satellite view maps, hybrid maps) with the ggmap package:
library(ggplot2)
library(ggmap)
## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
map_base <- get_map(location = c(lon = -79.944248, lat = 40.4415861),
color = "color",
source = "google",
maptype = "road",
zoom = 16)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.441586,-79.944248&zoom=16&size=640x640&scale=2&maptype=roadmap&language=en-EN
map_object <- ggmap(map_base,
extent = "device",
ylab = "Latitude",
xlab = "Longitude")
map_object
Here, we’ll work with airline data from this GitHub repository.
Max Marchi wrote a great summary of using maps in R with ggmap here. We’ll follow this example closely in today’s lecture. Thanks, Max!
Before we begin, note that this is just one example of how you can add interesting information to maps with ggmap. As long as you have latitude and longitude information, you should be able to add data to maps. For more interesting examples and for an in-depth description of ggmap, see the short paper by David Kahle and Hadley Wickham here.
To load data from GitHub, you should navigate to the raw file, copy the URL, and use read.csv().
# Load and format airports data
airports <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat", header = FALSE)
colnames(airports) <- c("ID", "name", "city", "country", "IATA_FAA", "ICAO", "lat", "lon", "altitude", "timezone", "DST", "misc_loc")
head(airports)
## ID name city country IATA_FAA
## 1 1 Goroka Goroka Papua New Guinea GKA
## 2 2 Madang Madang Papua New Guinea MAG
## 3 3 Mount Hagen Mount Hagen Papua New Guinea HGU
## 4 4 Nadzab Nadzab Papua New Guinea LAE
## 5 5 Port Moresby Jacksons Intl Port Moresby Papua New Guinea POM
## 6 6 Wewak Intl Wewak Papua New Guinea WWK
## ICAO lat lon altitude timezone DST misc_loc
## 1 AYGA -6.081689 145.3919 5282 10 U Pacific/Port_Moresby
## 2 AYMD -5.207083 145.7887 20 10 U Pacific/Port_Moresby
## 3 AYMH -5.826789 144.2959 5388 10 U Pacific/Port_Moresby
## 4 AYNZ -6.569828 146.7262 239 10 U Pacific/Port_Moresby
## 5 AYPY -9.443383 147.2200 146 10 U Pacific/Port_Moresby
## 6 AYWK -3.583828 143.6692 19 10 U Pacific/Port_Moresby
# Load and format routes data
routes <- read.csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat", header=F)
colnames(routes) <- c("airline", "airlineID", "sourceAirport", "sourceAirportID", "destinationAirport", "destinationAirportID", "codeshare", "stops", "equipment")
head(routes)
## airline airlineID sourceAirport sourceAirportID destinationAirport
## 1 2B 410 AER 2965 KZN
## 2 2B 410 ASF 2966 KZN
## 3 2B 410 ASF 2966 MRV
## 4 2B 410 CEK 2968 KZN
## 5 2B 410 CEK 2968 OVB
## 6 2B 410 DME 4029 KZN
## destinationAirportID codeshare stops equipment
## 1 2990 0 CR2
## 2 2990 0 CR2
## 3 2962 0 CR2
## 4 2990 0 CR2
## 5 4078 0 CR2
## 6 2990 0 CR2
Here, we’ll do some data manipulation to obtain the number of arrivals/departures per airport.
# Manipulate the routes data to create two new data.frames -- one for arrivals, one for departures. Each counts the number of flights arriving/departing from each airport.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
departures <- routes %>%
dplyr::group_by(sourceAirportID) %>%
dplyr::summarize(flights = n()) %>%
mutate(sourceAirportID = as.integer(as.vector(sourceAirportID)))
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
arrivals <- routes %>%
dplyr::group_by(destinationAirportID) %>%
dplyr::summarize(flights = n()) %>%
mutate(destinationAirportID = as.integer(as.vector(destinationAirportID)))
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
# Merge each of the arrivals/departures data.frames with the airports data.frame above
airportD <- left_join(airports, departures, by = c("ID" = "sourceAirportID"))
airportA <- left_join(airports, arrivals, by = c("ID" = "destinationAirportID"))
# Create data.frame of routes to/from a specific city
my_airport_code <- "PIT"
my_routes <- dplyr::filter(routes,
sourceAirport == my_airport_code |
destinationAirport == my_airport_code)
# Add in relevant information from the airports data.frame
# Do this in two steps, so that you first pull in the source airport information
# and then pull in the destination airport information
# This can be done easily with two calls to left_join()
# The rest of the code is just formatting
my_airport_data <- my_routes %>%
left_join(airports, by = c("sourceAirport" = "IATA_FAA")) %>%
dplyr::select(destinationAirport, lat, lon, timezone) %>%
dplyr::rename(source_lat = lat, source_lon = lon, source_timezone = timezone) %>%
left_join(airports, by = c("destinationAirport" = "IATA_FAA")) %>%
dplyr::select(source_lat, source_lon, source_timezone, lat, lon, timezone) %>%
dplyr::rename(dest_lat = lat, dest_lon = lon, dest_timezone = timezone)
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factors with different levels, coercing to character vector
We’ll use ggmap to create a base map of the data. Now, we’ll add layers of points onto the map.
# Use ggmap to visualize the European flights
library(ggmap)
map <- get_map(location = 'Europe', zoom = 4)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Europe&zoom=4&size=640x640&scale=2&maptype=terrain&language=en-EN
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Europe
map <- get_map(location = 'United States', zoom = 4)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=United+States&zoom=4&size=640x640&scale=2&maptype=terrain&language=en-EN
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=United%20States
# Visualize the basic map
ggmap(map)
# Add points to the map of departures
# Each point will be located at the lat/long of the airport
# The size of the points is proportional to the square root of the number of flights at that airport
mapPoints <- ggmap(map) +
geom_point(aes(x = lon, y = lat, size = sqrt(flights), color = DST),
data = airportD, alpha = .5)
# Add a custom legend to the plot
mapPointsLegend <- mapPoints +
scale_size_area(breaks = sqrt(c(1, 5, 10, 50, 100, 500)),
labels = c(1, 5, 10, 50, 100, 500),
name = "departing routes")
mapPointsLegend
## Warning: Removed 7477 rows containing missing values (geom_point).
Below, we create a new variable (flight type – arrival or departure) and use it to create a single data.frame of all arrivals and departures.
We’ll then facet on this variable so we can simultaneously visualize both arriving and departing flights.
# Create a data.frame containing both departures and arrivals
# Do this by creating a "type" variable and then combining the two data.frames
# We will later be able to use the type variable for facetting
airportD$type <- "departures"
airportA$type <- "arrivals"
airportDA <- rbind(airportD, airportA)
# Create our base plot and add the custom legend
mapPointsDA <- ggmap(map) +
geom_point(aes(x = lon, y = lat, size = sqrt(flights)),
data = airportDA, alpha = .5) +
scale_size_area(breaks = sqrt(c(1, 5, 10, 50, 100, 500)),
labels = c(1, 5, 10, 50, 100, 500),
name = "routes")
# Facet on flight type (arrival/departure)
mapPointsDA + facet_grid(. ~ type)
## Warning: Removed 14952 rows containing missing values (geom_point).
ggmapgc <- geocode("area 51")
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=area%2051
map <- get_map(gc, zoom = 6)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=45.242965,-89.677783&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN
ggmap(map) +
geom_point(aes(x = lon, y = lat),
data = gc, size = 5, color = "red")
What is a polygon?
geom_polygon()In ggplot(), polygons are just another geometry, making it really easy to add geographic shapes (e.g. corresponding to countries, states, counties, etc.) to maps.
# Note: The sp package can be really fussy at installation
# If prompted, do not restart R when installing the package
# If prompted with "Do you want to install from sources the packages which need compilation?", type 'n'
#install.packages("sp", dependencies = T)
library(sp)
library(ggmap)
us_data <- map_data("state")
county_data <- map_data("county")
us_county_map <- ggplot() +
geom_polygon(aes(long, lat, group=group), fill = "blue", size = 4,
data = county_data) +
geom_polygon(aes(long, lat, group=group), color = 'white',
fill = NA, data = us_data) +
theme_bw() + theme(axis.text = element_blank(),
axis.title = element_blank())
us_county_map
coord_map() to specify your map projectionWay back at the beginning of the semester, we learned about using coord_cartesian() and coord_polar() to specify the coordinates of our plots. We’re (finally) revisiting this idea in the context of geographic mapping.
Using the coord_map() function, we can specify what kind of map projection we want to use.
us_county_map + coord_map("mercator")
us_county_map + coord_map("polyconic")
us_county_map + coord_map("sinusoidal")